# Shrinking and shouting: the political revolt of the declining middle in times of employment polarization
# Introduction to Special Issue on Political Consequences of Technological Change

# Thomas Kurer and Bruno Palier

# Create Figure 1

# November 29, 2018.

# clean
rm(list = ls()) 
dev.off()
cat("\014")  

# globals
options(scipen=999)

# packages

# if you are missing any of the following libraries, uncomment the next line and execute the command
# install.packages(c("tidyverse", "eurostat", "scales"), dependencies=T)

library(tidyverse)
library(eurostat)

library(scales)
defaultcols <- hue_pal()(3)

# load data from eurostat ----

# employment data
eurostat <- get_eurostat("lfsa_egais")
# population data for population-weighted plots
population <- get_eurostat("demo_gind")

# data preparation ----

# employment data
# reduce full data to relevant sample

# employed people
eurostat <- eurostat %>% filter(wstatus=="EMP")

# all age groups
eurostat <- eurostat %>% filter(age=="Y15-64")

# reduce to relevant vars
eurostat$year <- substr(eurostat$time, 1,4)
eurostat$year <- as.numeric(eurostat$year)
eurostat <- eurostat %>% dplyr::select(geo, year, sex, isco08, values)

occ <- eurostat %>% filter(sex=="T") %>% dplyr::select(-sex)

# Task Groups ----

# TOTAL 	Total
# OC1 	Managers
# OC2 	Professionals
# OC3 	Technicians and associate professionals
# OC4 	Clerical support workers
# OC5 	Service and sales workers
# OC6 	Skilled agricultural, forestry and fishery workers
# OC7 	Craft and related trades workers
# OC8 	Plant and machine operators and assemblers
# OC9 	Elementary occupations
# OC0 	Armed forces occupations
# NRP 	No response 

occ$task <- NA
occ$task[occ$isco08 %in% c("OC1", "OC2", "OC3")] <- 1
occ$task[occ$isco08 %in% c("OC4", "OC6", "OC7", "OC8")] <- 2
occ$task[occ$isco08 %in% c("OC5", "OC9")] <- 3

# "values" = employed persons (in thousand) 
occ <- occ %>% group_by(geo, year, task) %>% summarise(nr_task = sum(values, na.rm=T)) %>% filter(!is.na(task)) %>% ungroup() %>%
  group_by(geo, year) %>% mutate(nr_year = sum(nr_task, na.rm=T), taskshare = nr_task/nr_year) %>%
  filter(!geo %in% c("EA17", "EA18", "EA19", "EU15", "EU28", "TR", "ME", "MK", "EU27", "MT"))

# calculate change in employment from first to last year.
docc <- occ %>% group_by(geo) %>% filter(year==min(year) | year==max(year))
docc <- docc %>% group_by(geo) %>% mutate(labelmin=min(year))

# transform in wide format
docc <- docc %>%
  spread(year, taskshare) 

# keep first year per country (varying)
c2017 <- length(docc)
cfirstvar <- length(docc)-1

docc <- cbind.data.frame(docc[c(1:5, c2017)], first = rowSums(docc[,6:cfirstvar], na.rm=T))
docc$firstyear <- ifelse(docc$first==0, NA, docc$first)

docc <- docc  %>%
  group_by(geo, task) %>% mutate(sharefirst=mean(firstyear, na.rm=T), share2017=mean(`2017`, na.rm=T)) %>%
  mutate(dshare = share2017-sharefirst) %>%
  filter(!is.na(dshare))

docc <- docc[!duplicated(docc[,c('geo', 'task', 'dshare')]),]

docc$label <- paste(docc$geo, " (", docc$labelmin, "-2017", ")", sep="")

# exclude RS, not part of special issue

docc <- docc %>% filter(geo!="RS")

# country-specific changes, not shown in paper.

# ggplot(docc, aes(x=factor(task), y=dshare, fill=factor(task))) + 
#   geom_bar(width=.5, stat = "identity", position = "identity") + 
#   facet_wrap(~label) +
#   ylab("Change in Relative Share of Employment") + xlab("") +
#   scale_fill_manual("", values = c(defaultcols[1], defaultcols[2], defaultcols[3]),
#                     labels=c("non-routine cognitive", "routine", "non-routine manual")) +
#   theme_bw() + theme(legend.text=element_text(size=11)) + labs(caption="Data Source: Eurostat")


# Figure 1

cntries <- as.vector(unique(docc$geo))

# generate population weights for averaging
# average january population pooled over time since 1991
population$year <- as.character(as.numeric(substr(population$time,1,4)))
pw <- population %>% filter(indic_de=="JAN" & year>1990 & geo %in% cntries) %>% droplevels() %>%
  group_by(geo) %>% summarise(pweight=mean(values,na.rm=T))

# calculate average change across countries. exclude LU (very small sample, disproportionately large changes in share)
dall <- docc %>% dplyr::select(geo, task, dshare) %>% filter(geo!="LU")

# merge pweight from population data
dmean <- merge(dall, pw, by=c("geo"))

# calculate weighted mean
dmean <- dmean %>% group_by(task) %>% summarise(dwmean = weighted.mean(dshare,pweight))

# rearrange order of task group
dall$task_sort <- factor(dall$task, levels = c("3","2","1"))
dmean$task_sort <- factor(dmean$task, levels = c("3","2","1"))


ggplot(data=dall, aes(x=factor(task_sort), y=dshare*100, color=factor(task_sort), fill=factor(task_sort))) + 
  geom_hline(yintercept = 0, alpha=0.8) +
  geom_point(shape=1) +
  ggrepel::geom_text_repel(aes(label = geo), color = "black", size = 3, segment.color = "grey") +
  geom_bar(data = dmean, aes(x=factor(task_sort), y=dwmean*100), stat = "identity", alpha=0.5) +
  ylab("Change in Relative Share of Employment (in p.p.)") + xlab("") +
  scale_x_discrete(labels = c("non-routine manual", "routine", "non-routine cognitive")) +
  theme_bw() + theme(text = element_text(size = 12), axis.text.x=element_text(size=12), axis.text.y=element_text(size=12), legend.position="none") 

ggsave("Figure1.eps", width=5, height=5, device=cairo_ps)
